Question 1


In [62]:
import pandas as pd
import numpy as np

In [80]:
# energy = pd.read_excel('./Energy Indicators.xls')
# energy = (energy
#          .drop(['Unnamed: 0', 'Unnamed: 1'], axis=1))
# energy.iloc[240:245,:]


Out[80]:
Environmental Indicators: Energy Unnamed: 3 Unnamed: 4 Unnamed: 5
240 Yemen 344 13 0
241 Zambia 400 26 99.7147
242 Zimbabwe 480 32 52.5361
243 NaN NaN NaN NaN
244 NaN NaN NaN NaN

Load Energy Indicator Data


In [125]:
energy = pd.read_excel('./Energy Indicators.xls')
energy = (energy
         .drop(['Unnamed: 0', 'Unnamed: 1'], axis=1)
         .iloc[16:243,:]
         .replace('...', np.nan)
         .reset_index(drop=True))

energy.columns = ['Country', 'Energy Supply', 'Energy Supply per Capita', '% Renewable']
#energy.tail()


Out[125]:
Country Energy Supply Energy Supply per Capita % Renewable
222 Viet Nam 2554.0 28.0 45.32152
223 Wallis and Futuna Islands 0.0 26.0 0.00000
224 Yemen 344.0 13.0 0.00000
225 Zambia 400.0 26.0 99.71467
226 Zimbabwe 480.0 32.0 52.53612

In [126]:
energy['Energy Supply'] *= 1000000
energy['Country'].replace('[0-9]+', '', regex=True, inplace=True)

energy['Country'].replace({"Republic of Korea": "South Korea",
"United States of America": "United States",
"United Kingdom of Great Britain and Northern Ireland": "United Kingdom",
"China, Hong Kong Special Administrative Region": "Hong Kong"}, inplace=True)

energy.replace('\([^)]*\)', '', regex=True, inplace=True)

energy['Country'] = energy['Country'].str.strip()
#energy.head(3)


Out[126]:
Country Energy Supply Energy Supply per Capita % Renewable
0 Afghanistan 3.210000e+08 10.0 78.66928
1 Albania 1.020000e+08 35.0 100.00000
2 Algeria 1.959000e+09 51.0 0.55101

In [ ]:


In [127]:
' a b '.strip()


Out[127]:
'a b'

Load GDP Data


In [141]:
GDP = pd.read_csv('./API_NY.GDP.MKTP.CD_DS2_en_csv_v2/API_NY.GDP.MKTP.CD_DS2_en_csv_v2.csv', 
                  skiprows=3)
GDP.head()


Out[141]:
Country Name Country Code Indicator Name Indicator Code 1960 1961 1962 1963 1964 1965 ... 2008 2009 2010 2011 2012 2013 2014 2015 2016 Unnamed: 61
0 Aruba ABW GDP (current US$) NY.GDP.MKTP.CD NaN NaN NaN NaN NaN NaN ... 2.791961e+09 2.498933e+09 2.467704e+09 2.584464e+09 NaN NaN NaN NaN NaN NaN
1 Afghanistan AFG GDP (current US$) NY.GDP.MKTP.CD 5.377778e+08 5.488889e+08 5.466667e+08 7.511112e+08 8.000000e+08 1.006667e+09 ... 1.019053e+10 1.248694e+10 1.593680e+10 1.793024e+10 2.053654e+10 2.004633e+10 2.005019e+10 1.970299e+10 1.946902e+10 NaN
2 Angola AGO GDP (current US$) NY.GDP.MKTP.CD NaN NaN NaN NaN NaN NaN ... 8.417803e+10 7.549238e+10 8.247091e+10 1.041159e+11 1.153984e+11 1.249121e+11 1.267769e+11 1.029622e+11 8.963316e+10 NaN
3 Albania ALB GDP (current US$) NY.GDP.MKTP.CD NaN NaN NaN NaN NaN NaN ... 1.288135e+10 1.204421e+10 1.192695e+10 1.289087e+10 1.231978e+10 1.278103e+10 1.321986e+10 1.139037e+10 1.192689e+10 NaN
4 Andorra AND GDP (current US$) NY.GDP.MKTP.CD NaN NaN NaN NaN NaN NaN ... 4.001201e+09 3.650083e+09 3.346517e+09 3.427023e+09 3.146152e+09 3.248925e+09 NaN NaN NaN NaN

5 rows × 62 columns


In [139]:
GDP['Country Name'].replace({"Korea, Rep.": "South Korea", 
                            "Iran, Islamic Rep.": "Iran",
                            "Hong Kong SAR, China": "Hong Kong"},
                            inplace=True)
GDP = GDP.rename(columns={'Country Name': 'Country'})
#GDP['Country Name'].unique().tolist() == GDP['Country Name'].tolist()

cols = [ 'Country','2006', '2007', '2008', '2009', '2010', '2011',
        '2012', '2013', '2014', '2015']
# get rid of cols
GDP_to_merge = GDP[cols]

GDP_to_merge.head(3)


Out[139]:
Country 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015
0 Aruba 2.421475e+09 2.623726e+09 2.791961e+09 2.498933e+09 2.467704e+09 2.584464e+09 NaN NaN NaN NaN
1 Afghanistan 7.057598e+09 9.843842e+09 1.019053e+10 1.248694e+10 1.593680e+10 1.793024e+10 2.053654e+10 2.004633e+10 2.005019e+10 1.970299e+10
2 Angola 4.178948e+10 6.044892e+10 8.417803e+10 7.549238e+10 8.247091e+10 1.041159e+11 1.153984e+11 1.249121e+11 1.267769e+11 1.029622e+11

In [130]:
'Iran' in GDP['Country']


Out[130]:
False

Load Country Rank Data for Energy Engineering and Power Tech.


In [131]:
ScimEn = pd.read_excel('./scimagojr.xlsx')
ScimEn_15 = ScimEn[ScimEn['Rank'] <= 15]
ScimEn_15


Out[131]:
Rank Country Documents Citable documents Citations Self-citations Citations per document H index
0 1 China 147887 147512 856806 583858 5.79 162
1 2 United States 113579 111426 1085684 370574 9.56 259
2 3 Japan 34294 34054 275980 73491 8.05 145
3 4 United Kingdom 24328 23671 278694 52119 11.46 159
4 5 India 21450 21183 179494 54929 8.37 132
5 6 Russian Federation 21259 20915 45629 17368 2.15 65
6 7 Germany 20898 20640 193676 39615 9.27 140
7 8 Canada 20689 20353 285554 53955 13.80 165
8 9 France 15584 15387 173959 37411 11.16 129
9 10 South Korea 14037 13952 151281 29670 10.78 116
10 11 Italy 13662 13457 154242 37030 11.29 121
11 12 Spain 11002 10886 167492 31489 15.22 130
12 13 Iran 10969 10872 94111 31251 8.58 85
13 14 Australia 10616 10496 129788 22759 12.23 123
14 15 Brazil 10599 10521 84010 20271 7.93 97

Merge Dataframes


In [132]:
df_merged = pd.merge(pd.merge(ScimEn_15, energy ,how='inner', on='Country'),
                    GDP_to_merge,how='inner', on='Country')

df_merged.set_index('Country', inplace=True)
df_merged.columns.tolist()


Out[132]:
['Rank',
 'Documents',
 'Citable documents',
 'Citations',
 'Self-citations',
 'Citations per document',
 'H index',
 'Energy Supply',
 'Energy Supply per Capita',
 '% Renewable',
 '2006',
 '2007',
 '2008',
 '2009',
 '2010',
 '2011',
 '2012',
 '2013',
 '2014',
 '2015']

In [133]:
df_merged.shape


Out[133]:
(15, 20)

In [134]:
'Zimbabwe' in df_merged.index.tolist()


Out[134]:
False

In [137]:
def answer_one():
    import pandas as pd
    import numpy as np
    # Load Energy Data
    energy = pd.read_excel('./Energy Indicators.xls')
    energy = (energy
         .drop(['Unnamed: 0', 'Unnamed: 1'], axis=1)
         .iloc[16:243,:]
         .replace('...', np.nan)
         .reset_index(drop=True))
    
    energy.columns = ['Country', 'Energy Supply', 'Energy Supply per Capita',
                      '% Renewable']
    energy['Energy Supply'] *= 1000000
    energy['Country'].replace('[0-9]+', '', regex=True, inplace=True)
    
    energy['Country'].replace({"Republic of Korea": "South Korea",
    "United States of America": "United States",
    "United Kingdom of Great Britain and Northern Ireland": "United Kingdom",
    "China, Hong Kong Special Administrative Region": "Hong Kong"}, inplace=True)
    
    energy.replace('\([^)]*\)', '', regex=True, inplace=True)
    energy['Country'] = energy['Country'].str.strip()
    
    # Load GDP Data
    GDP = pd.read_csv('./API_NY.GDP.MKTP.CD_DS2_en_csv_v2/API_NY.GDP.MKTP.CD_DS2_en_csv_v2.csv', 
                  skiprows=3)
    GDP['Country Name'].replace({"Korea, Rep.": "South Korea", 
                            "Iran, Islamic Rep.": "Iran",
                            "Hong Kong SAR, China": "Hong Kong"},
                            inplace=True)
    GDP = GDP.rename(columns={'Country Name': 'Country'})
    
    cols = [ 'Country','2006', '2007', '2008', '2009', '2010', '2011',
        '2012', '2013', '2014', '2015']
    GDP_to_merge = GDP[cols]
    
    # Load Country Rank data
    ScimEn = pd.read_excel('./scimagojr.xlsx')
    ScimEn_15 = ScimEn[ScimEn['Rank'] <= 15]
    
    df_merged = pd.merge(pd.merge(ScimEn_15, energy ,how='inner', on='Country'),
                    GDP_to_merge,how='inner', on='Country')

    df_merged.set_index('Country', inplace=True)
    answer = df_merged
    return answer
answer_one()


Out[137]:
Rank Documents Citable documents Citations Self-citations Citations per document H index Energy Supply Energy Supply per Capita % Renewable 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015
Country
China 1 147887 147512 856806 583858 5.79 162 1.271910e+11 93.0 19.754910 2.752132e+12 3.552183e+12 4.598205e+12 5.109954e+12 6.100620e+12 7.572554e+12 8.560547e+12 9.607224e+12 1.048237e+13 1.106466e+13
United States 2 113579 111426 1085684 370574 9.56 259 9.083800e+10 286.0 11.570980 1.385589e+13 1.447764e+13 1.471858e+13 1.441874e+13 1.496437e+13 1.551793e+13 1.615526e+13 1.669152e+13 1.739310e+13 1.803665e+13
Japan 3 34294 34054 275980 73491 8.05 145 1.898400e+10 149.0 10.232820 4.530377e+12 4.515265e+12 5.037908e+12 5.231383e+12 5.700098e+12 6.157460e+12 6.203213e+12 5.155717e+12 4.848733e+12 4.383076e+12
United Kingdom 4 24328 23671 278694 52119 11.46 159 7.920000e+09 124.0 10.600470 2.678278e+12 3.063005e+12 2.875463e+12 2.367127e+12 2.429680e+12 2.608825e+12 2.646003e+12 2.719509e+12 2.998834e+12 2.861091e+12
India 5 21450 21183 179494 54929 8.37 132 3.319500e+10 26.0 14.969080 9.203165e+11 1.201112e+12 1.186953e+12 1.323940e+12 1.656617e+12 1.823050e+12 1.827638e+12 1.856722e+12 2.035393e+12 2.111751e+12
Russian Federation 6 21259 20915 45629 17368 2.15 65 3.070900e+10 214.0 17.288680 9.899305e+11 1.299705e+12 1.660844e+12 1.222644e+12 1.524916e+12 2.031769e+12 2.170144e+12 2.230625e+12 2.063662e+12 1.365865e+12
Germany 7 20898 20640 193676 39615 9.27 140 1.326100e+10 165.0 17.901530 3.002446e+12 3.439953e+12 3.752366e+12 3.418005e+12 3.417095e+12 3.757698e+12 3.543984e+12 3.752514e+12 3.879277e+12 3.363600e+12
Canada 8 20689 20353 285554 53955 13.80 165 1.043100e+10 296.0 61.945430 1.315415e+12 1.464977e+12 1.549131e+12 1.371153e+12 1.613464e+12 1.788648e+12 1.824289e+12 1.842628e+12 1.792883e+12 1.552808e+12
France 9 15584 15387 173959 37411 11.16 129 1.059700e+10 166.0 17.020280 2.325012e+12 2.663113e+12 2.923466e+12 2.693827e+12 2.646837e+12 2.862680e+12 2.681416e+12 2.808511e+12 2.849305e+12 2.433562e+12
South Korea 10 14037 13952 151281 29670 10.78 116 1.100700e+10 221.0 2.279353 1.011797e+12 1.122679e+12 1.002219e+12 9.019350e+11 1.094499e+12 1.202464e+12 1.222807e+12 1.305605e+12 1.411334e+12 1.382764e+12
Italy 11 13662 13457 154242 37030 11.29 121 6.530000e+09 109.0 33.667230 1.942634e+12 2.203053e+12 2.390729e+12 2.185160e+12 2.125058e+12 2.276292e+12 2.072823e+12 2.130491e+12 2.151733e+12 1.824902e+12
Spain 12 11002 10886 167492 31489 15.22 130 4.923000e+09 106.0 37.968590 1.264551e+12 1.479342e+12 1.634989e+12 1.499075e+12 1.431588e+12 1.488017e+12 1.335946e+12 1.361776e+12 1.375856e+12 1.192955e+12
Iran 13 10969 10872 94111 31251 8.58 85 9.172000e+09 119.0 5.707721 2.586457e+11 3.374745e+11 3.971896e+11 3.989781e+11 4.677902e+11 5.920378e+11 5.872094e+11 5.116209e+11 4.253261e+11 3.934361e+11
Australia 14 10616 10496 129788 22759 12.23 123 5.386000e+09 231.0 11.810810 7.475726e+11 8.537646e+11 1.055335e+12 9.271683e+11 1.142877e+12 1.390557e+12 1.538194e+12 1.567179e+12 1.459598e+12 1.345383e+12
Brazil 15 10599 10521 84010 20271 7.93 97 1.214900e+10 59.0 69.648030 1.107640e+12 1.397084e+12 1.695825e+12 1.667020e+12 2.208872e+12 2.616202e+12 2.465189e+12 2.472807e+12 2.455993e+12 1.803653e+12

In [ ]: